rm(list = ls()[ls()!="drop_attn_fails"])
library(tidyverse)
library(readr)
library(psych)
library(ggcorrplot)
library(survey)
library(cregg)
library(tidycat)
library(broom)
library(anesrake)

## Load conjoint attributes from source files

source("plumber_uk.R")
parties_uk <- parties
class_uk <- class
ethnicities_uk <- ethnicities
genders_uk <- genders
sexuality_uk <- sexuality
justification_uk <- justification
personalization_uk <- personalization
responsiveness_uk <- responsiveness
substantive_uk <- substantive
surrogation_uk <- surrogation
descriptive_uk <- c(genders_uk, class_uk, ethnicities_uk, sexuality_uk)

source("plumber_de.R")
parties_de <- parties
class_de <- class
ethnicities_de <- ethnicities
genders_de <- genders
sexuality_de <- sexuality
justification_de <- justification_masculine
personalization_de <- personalization_masculine
responsiveness_de <- responsiveness
substantive_de <- substantive
surrogation_de <- surrogation_masculine
descriptive_de <- c(genders_de, class_de, ethnicities_de, sexuality_de)

source("plumber_us.R")
parties_us <- parties
class_us <- class
ethnicities_us <- ethnicities
genders_us <- genders
sexuality_us <- sexuality
justification_us <- justification
personalization_us <- personalization
responsiveness_us <- responsiveness
substantive_us <- substantive
surrogation_us <- surrogation
descriptive_us <- c(genders_us, class_us, ethnicities_us, sexuality_us)


sub_rep_names <- c("sub_rep_1_1", "sub_rep_1_2", "sub_rep_1_3", "sub_rep_1_4")
surr_rep_names <- c("surr_rep_1_1", "surr_rep_1_2", "surr_rep_1_3")
desc_rep_names <- c("desc_rep_1_1", "desc_rep_1_2", "desc_rep_1_3", "desc_rep_1_4", "desc_rep_1_5")
just_rep_names <- c("just_rep1_1", "just_rep1_2", "just_rep1_3", "just_rep1_4")
pers_rep_names <- c("pers_rep1_1", "pers_rep1_2", "pers_rep1_3", "pers_rep1_4")
resp_rep_names <- c("respons_rep1_1", "respons_rep1_2", "respons_rep1_3")

all_names <- c(sub_rep_names, surr_rep_names, desc_rep_names, just_rep_names, pers_rep_names, resp_rep_names)

## ###########
## Clean data

## Load survey data 

# UK

uk <- read_csv("../data/Measuring+Multidimensional+Representation+-+Conjoint+UK_February+9%2C+2023_07.24.csv") %>% slice(3:nrow(.)) %>%
  # Filter to dummy data and participating observations
  filter(Consent != "I do not want to participate in this study." & 
           Right_to_vote != "None of the above" & 
           Progress == 100) %>%
  mutate(dur = as.numeric(Q_TotalDuration),
         region = ifelse(!is.na(result.0.region), result.0.region, result.0.country),
         age = 2021 - as.numeric(Birth),
         age_cat = factor(case_when(age >=18 & age <= 24 ~ "18-24",
                             age >=25 & age <= 34 ~ "25-34",
                             age >=35 & age <= 44 ~ "35-44",
                             age >=45 & age <= 54 ~ "45-54",
                             age >=55 & age <= 64 ~ "55-64",
                             age >=65 ~ "65+"), levels = c("18-24", "25-34", "35-44", "45-54", "55-64", "65+")),
         educ = recode(Education,
                       "No qualification" = "No qual",
                       "GCSE D-G, CSE grades 2-5, O level D-E" = "Level 1",
                       "City & Guilds level 1, NVQ/SVQ 1 and equivalent" = "Level 1",
                       "GCSE A*-C, CSE grade 1, O level grade A-C" = "Level 2",
                       "Scottish Standard grades, Ordinary bands" = "Level 2",
                       "City & Guilds level 2, NVQ/SVQ 2 and equivalent" = "Level 2",
                       "A level or equivalent" = "Level 3",
                       "Scottish higher" = "Level 3",
                       "ONC/OND, City & Guilds level 3, NVQ/SVQ 3" = "Level 3",
                       "University degree (for example BA, BSc), postgraduate degree (for example MA, PhD)" = "Level 4",
                       "University/Polytechnic diploma" = "Level 4",
                       "Teaching qualification or nursing qualification" = "Level 4",
                       "HNC/HND, City & Guilds level 4, NVQ/SVQ 4/5" = "Level 4",
                       "Other qualification" = "Other",
                       "Apprenticeship" = "Other"
                       ),
         educ = factor(educ, levels = c("No qual", "Level 1", "Level 2", "Level 3", "Level 4", "Other")),
         Class = factor(Class, levels = c("Working class", "Middle class", "Upper class")),
         Gender = factor(Gender, levels = c("Male", "Female")),
         RetroVote = case_when(gen_election_retro == "Conservative" ~ "Conservative",
                               gen_election_retro == "Labour" ~ "Labour",
                               gen_election_retro == "Liberal Democrat" ~ "Liberal Democrat",
                               gen_election_retro %in% c("Brexit Party", "Green", "Other", "Scottish National Party (SNP)", "Plaid Cymru") ~ "Other",
                               gen_election_retro %in% c("Did not vote", "Was not eligible to vote") ~ "Did not vote"),
         ethnicity = case_when(Ethnicity == "White British" ~ "White",
                               Ethnicity == "Any other white background" ~ "White",
                               grepl("black|Black", Ethnicity) ~ "Black",
                               grepl("Asian|Bangladeshi|Indian|Pakistani|Chinese", Ethnicity) ~ "Asian",
                               TRUE ~ "Other"
                               ),
         RetroVote = factor(RetroVote, levels = c("Conservative", "Labour", "Other", "Did not vote")),
         ethnicity = factor(ethnicity, levels = c("White", "Black", "Asian", "Other")),
         general_ideology = as.numeric(gsub(" - left| - right", "", general_ideology)),
         political_trust_tmp = political_trust,
         political_trust = as.numeric(gsub(" - Do not trust at all| - Trust a great deal", "", political_trust)),
         political_interest = factor(political_interest, c("Not at all interested", "Not very interested", "Somewhat interested", "Very interested")),
         saw_conjoint_first = FL_63_DO == "Conjointvignette|FL_23") %>%
  mutate(across(all_of(all_names), 
                ~as.numeric(factor(.x,levels = c("Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree"))), 
                .names = "{.col}_num")) %>%
  filter(!is.na(satisf_dem) | !is.na(salience_1) | !is.na(general_ideology)) %>% # Drops erroneous Scot/Wales/Ire respondents
  filter(dur >= 300) # Drops speeders

# DE

de <- read_csv("../data/Measuring+Multidimensional+Representation+-+Conjoint+DE_February+9%2C+2023_07.49.csv") %>% slice(3:nrow(.)) %>%
  # Filter to dummy data and participating observations
  filter(!is.na(Consent)) %>%
  filter(!is.na(Birth)) %>%
  filter(!is.na(Gender)) %>%
  filter(!is.na(rid)) %>%
  filter(Finished == "True") %>%
  filter(Consent != "Ich möchte nicht an dieser Studie teilnehmen." & 
           Right_to_vote != "Nein") %>%
  filter(!is.na(Education)) %>% # Drops observations who failed to provide education information
  filter(as.numeric(Q_TotalDuration) >= 300) %>% # Drops speeders
  mutate(start_time = as.POSIXct(StartDate),
         dur = as.numeric(`Duration (in seconds)`),
         age = 2021 - as.numeric(Birth),
         age_cat = factor(case_when(age >=18 & age <= 24 ~ "18-24",
                                    age >=25 & age <= 34 ~ "25-34",
                                    age >=35 & age <= 44 ~ "35-44",
                                    age >=45 & age <= 54 ~ "45-54",
                                    age >=55 & age <= 64 ~ "55+",
                                    age >=65 ~ "55+"), levels = c("18-24", "25-34", "35-44", "45-54", "55+")),
         age_cat_b = factor(case_when(age >=18 & age <= 24 ~ "18-24",
                                    age >=25 & age <= 34 ~ "25-34",
                                    age >=35 & age <= 44 ~ "35-44",
                                    age >=45 & age <= 54 ~ "45-54",
                                    age >=55 & age <= 64 ~ "55-64",
                                    age >=65 ~ "65+"), levels = c("18-24", "25-34", "35-44", "45-54", "55-64", "65+")),
         region = case_when(
           Region %in% c("Hamburg", "Bremen", "Schleswig-Holstein", "Niedersachsen") ~ "North",
           Region %in% c("Baden-Württemberg") ~ "Baden-Württemberg",
           Region %in% c("Hessen", "Rheinland-Pfalz", "Saarland") ~ "Mid-West",
           Region %in% c("Bayern") ~ "Bavaria",
           Region %in% c("Berlin") ~ "Berlin",
           Region %in% c("Brandenburg", "Mecklenburg-Vorpommern", "Sachsen-Anhalt") ~ "North-East",
           Region %in% c("Nordrhein-Westfalen") ~ "Northrhine-Westphalia",
           Region %in% c("Sachsen", "Thüringen") ~ "Mid-East"
         ),
         educ = case_when(
           Education %in% c("Von der Schule abgegangen ohne Abschluss, vor Erreichen einer 8., 9., 10. oder 11. Klasse",
                            "Hauptschul- oder Volksschulabschluss") ~ "Hauptschul",
           Education %in% c("Mittlere Reife, Realschulabschluss, Fachschulreife oder gleichwertiger Abschluss") ~ "Mittlere Reife",
           Education %in% c("Abschluss der polytechnischen Oberschule") ~ "Abschluss",
           Education %in% c("Noch in schulischer Ausbildung",
                            "Abitur, allgemeine Hochschulreife oder Fachhochschulreife") ~ "Noch in schulischer",
           Education %in% c("Studienabschluss (z.B. Diplom, Magister, Bachelor, Master) oder Promotion") ~ "Studienabschluss"
         ), 
         general_ideology = as.numeric(gsub(" - rechts| - links","", general_ideology)),
         ethnicity = case_when(grepl("Ja,", Migr_back) ~ "Migrant background",
                               TRUE ~ "No migrant background"),
         ethnicity = factor(ethnicity, levels = c("No migrant background", "Migrant background")),
         RetroVote = case_when(gen_election_retro == "CDU/CSU" ~ "CDU/CSU",
                               gen_election_retro == "SPD" ~ "SPD",
                               gen_election_retro %in% c("AfD", "Bündnis 90/Die Grünen", "Die Linke", "FDP", "Sonstige") ~ "Other",
                               gen_election_retro %in% c("Ich habe nicht gewählt", "Ich war nicht wahlberechtigt") ~ "Did not vote"),
         RetroVote = factor(RetroVote),
         educ = factor(educ),
         Class = factor(Class),
         Gender = factor(Gender, levels = c("Männlich", "Weiblich")),
         political_interest  = factor(political_interest, levels = c("Überhaupt nicht interessiert", "Nicht sehr interessiert", "Etwas interessiert", "Sehr interessiert")),
         political_trust = as.numeric(gsub(" - Überhaupt kein Vertrauen| - Sehr viel Vertrauen", "",political_trust)),
         saw_conjoint_first = FL_62_DO == "Conjointvignette|FL_23") %>% 
  mutate(across(all_of(all_names), ~as.numeric(factor(.x,levels = c("Stimme überhaupt nicht zu", "Stimme nicht zu", "Stimme weder zu noch nicht zu", "Stimme zu", "Stimme voll und ganz zu"))), .names = "{.col}_num")) %>%  
  arrange(start_time)


# US

us <- read_csv("../data/Measuring+Multidimensional+Representation+-+Conjoint+USA_February+9%2C+2023_06.44.csv") %>% slice(3:nrow(.)) %>%
  # Filter to dummy data and participating observations
  filter(Consent != "I do not want to participate in this study." & 
          Right_to_vote != "No" & 
          Progress == 100) %>%
  mutate(dur = as.numeric(`Duration (in seconds)`),
         age = 2021 - as.numeric(Birth),
         age_cat = factor(case_when(age >=18 & age <= 24 ~ "18-24",
                                    age >=25 & age <= 34 ~ "25-34",
                                    age >=35 & age <= 44 ~ "35-44",
                                    age >=45 & age <= 54 ~ "45-54",
                                    age >=55 & age <= 64 ~ "55-64",
                                    age >=65 ~ "65+"), levels = c("18-24", "25-34", "35-44", "45-54", "55-64", "65+")),
         region = case_when(
           State %in% c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "New Jersey", "New York", "Pennsylvania", "Rhode Island", "Vermont") ~ "North-East",
           State %in% c("Illinois", "Indiana", "Iowa", "Kansas", "Michigan", "Minnesota", "Missouri", "Nebraska", "North Dakota", "Ohio", "South Dakota", "Wisconsin") ~ "Midwest",
           State %in% c("Alabama", "Arkansas", "Delaware", "District of Columbia", "Florida", "Georgia", "Kentucky", "Louisiana", "Maryland", "Mississippi", "North Carolina", "Oklahoma", "South Carolina", "Tennessee", "Texas", "Virginia", "West Virginia", "Puerto Rico") ~ "South",
           State %in% c("Alaska", "Arizona", "California", "Colorado", "Hawaii", "Idaho", "Montana", "Nevada", "New Mexico", "Oregon" ,"Utah", "Washington", "Wyoming") ~ "West"
         ),
         educ = recode(Education,
                       "None or up to grade 8" = "None or up to grade 11",
                       "Grades 9, 10 or 11" = "None or up to grade 11",
                       "Associate's degree (for example AA, AS)" = "College degree",
                       "Bachelor's degree (for example BA, BS)" = "College degree",
                       "Professional degree beyond a Bachelor's degree (for example MD, DDS, DVM, LLB, JD)" = "Master's degree and above",
                       "Master's degree (for example MA, MS, MEng, Med, MSW, MBA)" = "Master's degree and above",
                       "Doctoral degree (for example PhD, EdD)" = "Master's degree and above"),
         educ = factor(educ, levels = c("None or up to grade 11", "High school graduate", "Some college, no degree", "College degree", "Master's degree and above")),
         RetroVote = case_when(house_election_retro == "Democratic candidate" ~ "Democrat",
                               house_election_retro == "Republican candidate" ~ "Republican",
         house_election_retro %in% c("Independent candidate", "Other candidate") ~ "Other",
         house_election_retro %in% c("Did not vote", "Was not eligible to vote") ~ "Did not vote"),
         RetroVote = factor(RetroVote, levels = c("Democrat", "Republican", "Other", "Did not vote")),
         general_ideology = factor(general_ideology, levels = c("Very Liberal", "Liberal", "Moderate", "Conservative", "Very Conservative")),
         political_trust = as.numeric(gsub(" - Do not trust at all| - Trust a great deal", "", political_trust)),
         political_interest = factor(political_interest, c("Not at all interested", "Not very interested", "Somewhat interested", "Very interested")),
         Gender = factor(Gender, levels = c("Male", "Female")),
         ethnicity = case_when(Ethnicity == "White" ~ "White",
                               Ethnicity == "Black" ~ "Black",
                               Ethnicity == "Hispanic" | Hispanic == "Yes" ~ "Hispanic",
                               TRUE ~ "Other"),
         ethnicity = factor(ethnicity, levels = c("White", "Black", "Hispanic", "Other")),
         Class = factor(Class, levels= c("Lower class", "Working class", "Middle class", "Upper class")),
         saw_conjoint_first = FL_30_DO == "Conjointvignette|FL_12") %>%
  mutate(across(all_of(all_names), ~as.numeric(factor(.x,levels = c("Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree"))), .names = "{.col}_num")) %>%
  filter(dur >= 300) # Drops speeders

# Survey weights from raking to population margins

uk_short <- uk %>% filter(pers_rep1_5 == "Agree") 
us_short <- us %>% filter(pers_rep1_5 == "Agree") 
de_short <- de %>% filter(pers_rep1_5 == "Stimme zu") 

short_data <- list(uk_short, us_short, de_short)

names(short_data) <- c("UK", "US", "DE")

get_weights <- function(country = "UK"){
  
  svy <- svydesign(data = short_data[[country]], id = ~rid, weights = rep(1, nrow(short_data[[country]])))
  
  age_gender <- readxl::read_excel(paste0("../data/",tolower(country),".xlsx"), sheet = "age_gender")
  region <- readxl::read_excel(paste0("../data/",tolower(country),".xlsx"), sheet = "region")
  education <- readxl::read_excel(paste0("../data/",tolower(country),".xlsx"), sheet = "educ")
  
  region <- data.frame(region = region$region, Freq = region$target_prop * nrow(short_data[[country]]))
  education <- data.frame(educ = education$educ, Freq = education$target_prop * nrow(short_data[[country]]))
  age_gender <- data.frame(age_cat = age_gender$Age, Gender = age_gender$Gender, Freq = age_gender$target_prop * nrow(short_data[[country]]))
  
  out <- survey::rake(svy, list(~region, ~educ, ~age_cat + Gender), list(region, education, age_gender))
  return(weights(out))
  
}

uk$wgt <- NA
uk$wgt[which(uk$pers_rep1_5 == "Agree")] <- get_weights("UK")

us$wgt <- NA
us$wgt[which(us$pers_rep1_5 == "Agree")] <- get_weights("US")

de$wgt <- NA
de$wgt[which(de$pers_rep1_5 == "Stimme zu")] <- get_weights("DE")

save(uk, de, us, file = "../working/survey_data_for_quota_checks.Rdata")

